# 加载gbm包
# if (!require(gbm)) install.packages("gbm")
# chooseCRANmirror()
# install.packages("gbm")

# 加载必要的包
# if (!require(caret)) install.packages("caret")
# if (!require(pROC)) install.packages("pROC")
# if (!require(MatchIt)) install.packages("MatchIt")
library(gbm)
library(dplyr)
library(caret)
library(pROC)
library(MatchIt)
# 服务器走不通报错 这个不进行适配 结束

# 设置工作目录并加载数据
setwd("${path}")
mydata <- read.csv("bibliometric_nuomotu.csv")
cols<-colnames(mydata)[3:38]
colnames(mydata)
cols
colsdou<- paste0("'",cols,"'")
independent_and<- paste0(cols,collapse = "+")
independent_dou<- paste0(colsdou,collapse = ",")
# 确保 group_best 是因子，并且值为 "event_0" 和 "event_1"
mydata$group_best <- factor(mydata$group_best, levels = c("0", "1"), labels = c("event_0", "event_1"))


# 使用 matchit 进行 1:3 匹配
m.out <- matchit(as.formula(paste0("group_best~",independent_and)),
                 data = mydata, 
                 method = "nearest", 
                 ratio = 3)

# 提取匹配后的数据
matched_data <- match.data(m.out)
# 将 group_best 转换为数值型 (0 和 1)
matched_data$group_best <- as.numeric(matched_data$group_best == "event_1")
# 设置交叉验证控制参数（GBM有自己的cross validation参数）
distribution = "bernoulli"

# 训练GBM模型
model_gbm <- gbm(
  formula = as.formula(paste0("group_best~",independent_and)), 
  data = matched_data,
  distribution = distribution,
  n.trees = 100,
  interaction.depth = 3,
  shrinkage = 0.1,
  cv.folds = 5,
  verbose = FALSE
)

# 预测和评估
predictions_gbm <- predict(model_gbm, newdata = mydata, n.trees = model_gbm$n.trees)
predicted_probs_gbm <- plogis(predictions_gbm)  # 转换为概率
predicted_classes_gbm <- factor(ifelse(predicted_probs_gbm >= 0.5, "event_1", "event_0"), levels = c("event_0", "event_1"))

# 添加预测结果到数据集
mydata_with_predictions_gbm <- mydata %>%
  mutate(
    group_best_probability = predicted_probs_gbm,
    group_best_predicted_class = predicted_classes_gbm,
    group_best_score = NA  # GBM没有评分卡
  )

# 计算 group_best 和 group_best_predicted_class 相同的数据数量
matching_rows <- mydata_with_predictions_gbm %>%
  filter(group_best == group_best_predicted_class) %>%
  nrow()
# 打印结果
cat("Number of rows where group_best and group_best_predicted_class match:", matching_rows, "\n")
# 可选：计算准确率（匹配行数 / 总行数）
total_rows <- nrow(mydata_with_predictions_gbm)
accuracy <- matching_rows / total_rows
# 打印结果
cat("Number of rows where group_best and group_best_predicted_class match:", accuracy, "\n")
# 这是建模集的结果
write.csv(mydata_with_predictions_gbm, file = "mydata_with_predictions.csv", row.names = FALSE)
mydata1 <- read.csv("bibliometric_nuomotuRes.csv")
# 预测和评估
predictions_gbm <- predict(model_gbm, newdata = mydata1, n.trees = model_gbm$n.trees)
predicted_probs_gbm <- plogis(predictions_gbm)  # 转换为概率
predicted_classes_gbm <- factor(ifelse(predicted_probs_gbm >= 0.5, "event_1", "event_0"), levels = c("event_0", "event_1"))
# 添加预测结果到数据集
result_with_predictions_gbm <- mydata1 %>%
  mutate(
    group_best_probability = predicted_probs_gbm,
    group_best_predicted_class = predicted_classes_gbm,
    group_best_score = NA  # GBM没有评分卡
  )
# 这是结局的结果
write.csv(result_with_predictions_gbm, file = "result_with_predictions.csv", row.names = FALSE)
# 结束
